cell_type_name = "CD8T"
file_tag = sprintf("%s_%s", cell_type_name, graph_weight)
assayed_genes = scan(sprintf("output/gene_list_%s.txt", file_tag),
what = character(), sep="\n")
gene_sets = scan(sprintf("output/name_s_%s.txt", file_tag),
what = character(), sep="\n")
gene_sets = sapply(gene_sets, strsplit, split=",")
n_genes = sapply(gene_sets, length)
names(n_genes) = NULL
summary(n_genes)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.00 19.75 20.00 20.00 21.00 22.00
## [1] 40
## [1] 15 16 18 19 19 19 19 19 19 19 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
## [26] 20 21 21 21 21 21 21 21 21 21 21 22 22 22 22
## [1] 800
bioMart.All the gene symbols that can be found in bioMart are
consistent with what we have. So no need to run it.
ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
gene_BM = getBM(attributes = c("hgnc_symbol", "external_gene_name"),
filters = "external_gene_name",
values = assayed_genes,
mart = ensembl)
length(assayed_genes)
dim(gene_BM)
gene_BM[1:2,]
table(assayed_genes %in% gene_BM$external_gene_name)
t1 = table(gene_BM$external_gene_name)
dup = names(t1)[t1 > 1]
gene_BM[gene_BM$external_gene_name %in% dup,]
table(gene_BM$hgnc_symbol == gene_BM$external_gene_name)
w2kp = which(gene_BM$hgnc_symbol != gene_BM$external_gene_name)
gene_BM[w2kp,]alias2Symbol function from
limma.a2s = rep(NA, length(assayed_genes))
for(i in 1:length(assayed_genes)){
gi = assayed_genes[i]
ai = alias2Symbol(gi)
if(length(ai) > 1){
print(gi)
print(ai)
}
a2s[i] = ai[1]
}## [1] "C7orf55"
## [1] "FMC1-LUC7L2" "FMC1"
## [1] "MARS"
## [1] "MARS1" "SLA2"
## [1] "QARS"
## [1] "EPRS1" "QARS1"
## [1] "APITD1"
## [1] "CENPS-CORT" "CENPS"
## [1] "HIST1H2BC"
## [1] "H2BC5" "H2BC4"
##
## FALSE TRUE
## 1120 139
##
## FALSE TRUE <NA>
## 71 1049 139
gene_info = data.table(sym_in_data = assayed_genes, sym_limma = a2s)
gene_info[sym_in_data != sym_limma,]## sym_in_data sym_limma
## 1: ZNRD1 POLR1H
## 2: C2orf43 LDAH
## 3: FAM45A DENND10
## 4: MTERF MTERF1
## 5: NARG2 ICE2
## 6: C6orf203 MTRES1
## 7: HRSP12 RIDA
## 8: FOPNL CEP20
## 9: WARS WARS1
## 10: LINS LINS1
## 11: RRNAD1 METTL25B
## 12: FAM122B PABIR2
## 13: WDYHV1 NTAQ1
## 14: HIST1H2BD H2BC5
## 15: AGPAT6 GPAT4
## 16: C1orf123 CZIB
## 17: KIAA1841 SANBR
## 18: C2orf44 WDCP
## 19: TMEM155 SMIM43
## 20: C7orf55 FMC1-LUC7L2
## 21: C9orf89 CARD19
## 22: C11orf82 DDIAS
## 23: XRCC6BP1 ATP23
## 24: MARS MARS1
## 25: C19orf55 PROSER3
## 26: HIST1H2AE H2AC8
## 27: QARS EPRS1
## 28: APITD1 CENPS-CORT
## 29: CCDC101 SGF29
## 30: PDDC1 GATD1
## 31: EFCAB4A CRACR2B
## 32: EIF2S3L EIF2S3B
## 33: HIST1H2BC H2BC5
## 34: WRB GET1
## 35: TMEM194B NEMP2
## 36: HIST1H4C H4C3
## 37: HIST1H4J H4C11
## 38: ZNF720 KRBOX5
## 39: HIST1H2BH H2BC9
## 40: FAM49A CYRIA
## 41: TTC37 SKIC3
## 42: C1orf85 GLMP
## 43: C9orf114 SPOUT1
## 44: HIST2H2AA4 H2AC19
## 45: HIST2H2BF H2BC18
## 46: C10orf128 TMEM273
## 47: TCTEX1D2 DYNLT2B
## 48: C17orf89 NDUFAF8
## 49: PHBP3 PHB1P3
## 50: SEPT7P7 SEPTIN7P7
## 51: LINC00493 SMIM26
## 52: HIST1H2BN H2BC15
## 53: ATP5J2 ATP5MF
## 54: AKAP2 PALM2AKAP2
## 55: WDR65 CFAP57
## 56: SRP14-AS1 SRP14-DT
## 57: CTC-436P18.1 SMIM15-AS1
## 58: HIST1H3G H3C8
## 59: EFTUD1P1 EFL1P1
## 60: ATP5A1P3 ATP5F1AP3
## 61: CCDC109B MCUB
## 62: C1orf63 RSRP1
## 63: TMEM66 SARAF
## 64: PCNXL2 PCNX2
## 65: ACRC GCNA
## 66: AMICA1 JAML
## 67: PRMT10 PRMT9
## 68: FAM102A EEIG1
## 69: SELM SELENOM
## 70: NBPF16 NBPF15
## 71: RPL9P9 RPL9P8
## sym_in_data sym_limma
gene_info[, gene_symbol := sym_in_data]
gene_info[which(sym_in_data != sym_limma), gene_symbol := sym_limma]
dim(gene_info)## [1] 1259 3
## sym_in_data sym_limma gene_symbol
## 1: C1orf112 C1orf112 C1orf112
## 2: MAD1L1 MAD1L1 MAD1L1
## 3: ICA1 ICA1 ICA1
## 4: NDUFAF7 NDUFAF7 NDUFAF7
## 5: ST7 ST7 ST7
## t1
## 1 2
## 1255 2
## sym_in_data sym_limma gene_symbol
## 1: HIST1H2BD H2BC5 H2BC5
## 2: HIST1H2BC H2BC5 H2BC5
## 3: RPL9P8 RPL9P8 RPL9P8
## 4: RPL9P9 RPL9P8 RPL9P8
Gene set annotations (by gene symbols) were downloaded from MSigDB website.
gmtfile = list()
gmtfile[["reactome"]] = "../Annotation/c2.cp.reactome.v2023.2.Hs.symbols.gmt"
gmtfile[["go_bp"]] = "../Annotation/c5.go.bp.v2023.2.Hs.symbols.gmt"
gmtfile[["immune"]] = "../Annotation/c7.all.v2023.2.Hs.symbols.gmt"
pathways = list()
for(k1 in names(gmtfile)){
pathways[[k1]] = gmtPathways(gmtfile[[k1]])
}
names(pathways)## [1] "reactome" "go_bp" "immune"
## reactome go_bp immune
## 1692 7647 5219
Filter gene sets for size between 10 and 500.
## $reactome
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 5.0 7.0 9.0 12.0 17.0 23.0 31.0 44.0 71.8 120.9 1463.0
##
## $go_bp
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 5.0 6.0 8.0 10.0 14.0 19.0 29.0 46.0 80.8 183.0 1966.0
##
## $immune
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 5 162 193 197 199 199 200 200 200 200 1992
## [1] 1259 3
## sym_in_data sym_limma gene_symbol
## 1: C1orf112 C1orf112 C1orf112
## 2: MAD1L1 MAD1L1 MAD1L1
max_n2kp = 10
goseq_res = NULL
for(k in 1:length(gene_sets)){
if(length(gene_sets[[k]]) < 10) { next }
print(k)
set_k = paste0("set_", k)
print(gene_sets[[k]])
genes = gene_info$sym_in_data %in% gene_sets[[k]]
names(genes) = gene_info$gene_symbol
table(genes)
pwf = nullp(genes, "hg38", "geneSymbol")
for(k1 in names(pathways)){
p1 = pathways[[k1]]
res1 = goseq(pwf, "hg38", "geneSymbol",
gene2cat=goseq:::reversemapping(p1))
res1$FDR = p.adjust(res1$over_represented_pvalue, method="BH")
nD = sum(res1$FDR < 0.05)
if(nD > 0){
res1 = res1[order(res1$FDR),][1:min(nD, max_n2kp),]
res1$category = gsub("REACTOME_|GOBP_", "", res1$category)
res1$category = gsub("_", " ", res1$category)
res1$category = tolower(res1$category)
res1$category = substr(res1$category, start=1, stop=120)
goseq_res[[set_k]][[k1]] = res1
}
}
}## [1] 1
## [1] "MRPS33" "MTFMT" "CPD" "CUL9" "CCR2"
## [6] "PARP2" "C11orf1" "C7orf55" "GLB1" "ETFDH"
## [11] "MRPL48" "HIST1H2BC" "ZNF765" "TTC37" "HIST2H2AA4"
## [16] "HIST1H2BN"
## [1] 2
## [1] "PTP4A1" "FAHD2A" "MYB" "DTD2"
## [5] "DPH6" "NARF" "ZBTB39" "FBXO22"
## [9] "BOLA1" "RP11-383G10.3" "UBA52P6" "RP11-365H23.1"
## [13] "RPS27AP2" "ADH5P4" "PTGES3P1" "AC007041.2"
## [17] "RP11-382J24.2" "LEF1" "SIK1" "MTND4P12"
## [21] "RP11-727F15.9"
## [1] 3
## [1] "REC8" "DUS4L" "GALK1" "IFNG" "GNPTAB"
## [6] "MCM9" "GORASP1" "STK25" "STYXL1" "TMEM218"
## [11] "LRRC28" "SPATA13" "EDARADD" "PIK3IP1" "ARRDC2"
## [16] "SLC38A2" "AMICA1" "PFKFB3" "RBKS" "RP11-44N11.1"
## [1] 4
## [1] "SH3BP2" "SNX14" "TAF10" "B2M" "AFMID" "TMSB4X" "IFRD1"
## [8] "RGPD5" "CD44" "ARHGEF1" "STK17B" "RGCC" "EGR1" "FOSB"
## [15] "JUND" "BTG2" "CXCR5" "TTC3" "RGPD6" "MALAT1"
## [1] 5
## [1] "CD200" "RP3-324O17.4" "ZNF419" "ZNF684"
## [5] "IL10" "RHEBL1" "C19orf55" "ZNF93"
## [9] "PIGW" "C2orf76" "ANAPC10P1" "RSC1A1"
## [13] "RP11-247I13.6" "CTC-338M12.4" "RP11-262H14.3" "CTB-4E7.1"
## [17] "HMGB1P24" "AP000462.1" "RP11-307P22.1" "RP11-638I2.10"
## [21] "RP11-1012A1.7" "TRAV30"
## [1] 6
## [1] "PIM2" "IFI6" "USF1" "IFI27"
## [5] "RP11-23P13.6" "ZNF80" "EIF2S3L" "ABHD16B"
## [9] "ZNF649" "SGMS1" "TRBV20-1" "TRAV19"
## [13] "TRAV24" "RP4-800M22.1" "RPS11P5" "RP11-288E14.2"
## [17] "AC144530.1" "RP11-693N9.2" "EEF1A1P10" "RP11-705C15.5"
## [1] 7
## [1] "VPS37A" "THAP2" "TRAV16" "TRAV21"
## [5] "RP1-102E24.1" "RP3-340B19.5" "RP11-2F9.3" "RPL19P12"
## [9] "RP11-270C12.3" "PGAM4" "GS1-124K5.2" "RPL19P21"
## [13] "AC090804.1" "RP11-446E9.1" "CTD-2301A4.3" "CTB-33G10.1"
## [17] "RNF138P1" "MIR146A" "HSPA8P5" "RP11-511H9.4"
## [21] "RP11-304L19.5"
## [1] 8
## [1] "CPQ" "RCAN3" "C1orf63" "NBPF14"
## [5] "PCNXL2" "NBPF10" "AC018720.10" "LDOC1"
## [9] "TSHZ2" "NBPF16" "HLA-J" "AC084018.1"
## [13] "HCG4P5" "RPL10P3" "HLA-W" "LINC00674"
## [17] "PSMD6-AS2" "ZNF10" "SLC7A5P1" "CTD-2017D11.1"
## [1] 9
## [1] "RGS1" "GMPR2" "PRMT7" "TRANK1"
## [5] "TRAPPC11" "CXCR6" "COA3" "CMC1"
## [9] "MT1X" "CCL3L1" "TECPR1" "TRGC2"
## [13] "PET100" "PVT1" "CCL3L3" "RP11-345J4.6"
## [17] "EIF1" "RP11-572P18.1" "RPS3AP6" "CTD-2031P19.4"
## [1] 10
## [1] "ZNF85" "MTHFS" "SCRN2" "FAM122B" "TMEM155"
## [6] "LAIR2" "ZBTB8OS" "ASB8" "GEN1" "ARHGAP11B"
## [11] "TMEM194B" "HIST1H4C" "ZNF28" "CRYGS" "AC098614.2"
## [16] "BDH2P1" "GAPDHP2" "RP11-265N6.2" "NPAT" "TCEAL3"
## [21] "TMEM9B-AS1"
## [1] 11
## [1] "REXO1" "POP4" "C3orf14" "GALE" "TMEM214" "CCDC77" "CKS2"
## [8] "UROD" "CCNB1" "FHOD1" "RPP30" "MPPE1" "GALK2" "CCNB2"
## [15] "POP5" "RUFY2" "MYCBP" "KIR3DL2"
## [1] 12
## [1] "CDKN3" "TIMM9" "TXNDC17" "FIGNL1" "TIMM10"
## [6] "TROAP" "GEMIN6" "NDUFAF6" "CCDC28B" "PAQR4"
## [11] "C11orf82" "ZFYVE19" "UBE2C" "TYMS" "SESTD1"
## [16] "NCKIPSD" "MDP1" "HIST1H3G" "PMF1-BGLAP"
## [1] 13
## [1] "SLAMF8" "TNFAIP8L2" "EFCAB7" "RPSAP54"
## [5] "RP11-12M9.4" "RPS19P1" "CALM2P3" "ARPC3P1"
## [9] "PRDX3P1" "SKP1P1" "AC093391.2" "AP000476.1"
## [13] "RPSAP4" "RPS19P3" "AC018462.3" "GAPDHP60"
## [17] "RP11-402J6.3" "RP13-638C3.4" "RP11-209D14.4" "CTB-52I2.4"
## [21] "RP11-282O18.6"
## [1] 14
## [1] "GCFC2" "MFAP3" "CD84" "CDKL1" "XAF1"
## [6] "MFSD9" "CASP1" "TMEM19" "TATDN1" "FCRL3"
## [11] "ZNF720" "SH2D1B" "CPT1B" "UBXN2B" "LINC00426"
## [16] "RBBP4P1" "SNORD3A" "AC024592.12" "MS4A1" "PLA2G4B"
## [1] 15
## [1] "GPN2" "ZBED2" "PLA2G6" "TXK"
## [5] "CLDND1" "MAN1C1" "RASGEF1B" "FRMD4A"
## [9] "POLR2J3" "YPEL2" "DENND5A" "RP11-514P8.6"
## [13] "RP11-91K8.4" "MTND1P23" "ACTBP12" "RP1-187B23.1"
## [17] "LINC00944" "RP11-335G20.7" "RP11-1000B6.3" "KCNQ1OT1"
## [1] 16
## [1] "RNF10" "BCL3" "DERL2" "MBNL3"
## [5] "FMR1" "METTL8" "KATNBL1" "TPGS2"
## [9] "TMOD3" "LINS" "LPP" "KBTBD2"
## [13] "KATNA1" "C17orf89" "PPIL3" "ATP5J2"
## [17] "RP11-386G11.10" "CLK1" "ABLIM1" "TC2N"
## [1] 17
## [1] "MED24" "TM7SF3" "POLD3" "DUSP12" "PLA2G15" "RASSF4" "NR2C1"
## [8] "NQO2" "ASNSD1" "TBCD" "UCK2" "PPP4R1" "NTAN1" "PPARA"
## [15] "ZNF511" "TRGV10" "TREX1" "MRPL33" "ABHD14A" "GGT7"
## [1] 18
## [1] "DYRK4" "NSMAF" "NHP2P1" "IL18RAP"
## [5] "FAM45A" "MTERF" "NUDT7" "AMDHD2"
## [9] "NAALADL1" "EFCAB4A" "WDR53" "ZNF749"
## [13] "TRGV5" "RP11-247I13.3" "RP11-119F19.2" "HNRNPKP2"
## [17] "PA2G4P4" "EEF1A1P14" "TRGV7" "KB-1507C5.2"
## [21] "TRAV1-2" "CTB-31O20.3"
## [1] 19
## [1] "PUS7L" "KHDC1" "SFR1" "KIAA1841" "LRRC45" "NME6"
## [7] "SWI5" "ZNF613" "LCORL" "LRTOMT" "CCDC30" "ENTPD5"
## [13] "METTL6" "MYL5" "PIN4P1" "SAPCD1" "GK3P" "RAB1C"
## [19] "GOT2P3" "ZEB1-AS1" "RPL13P5"
## [1] 20
## [1] "PRSS23" "AMFR" "MRPL55" "RPS17" "TNF" "PPP2R5C" "RPL34"
## [8] "RPS12" "RPL21" "RPL5" "BTG1" "PDCD4" "ZFP36L2" "RPL9"
## [15] "RPS18"
## [1] 21
## [1] "CCL3" "GLT8D1" "TBPL1" "FAR2" "GZMB" "MAN2B1"
## [7] "SLC12A4" "MT2A" "ANXA1" "IRF4" "GCHFR" "GTF2A2"
## [13] "C1orf123" "EXTL2" "METTL18" "PRF1" "NCR3" "UBE2D3P2"
## [19] "NR4A2" "CD55"
## [1] 22
## [1] "MAP3K13" "ZNF268" "FAH" "EGR2"
## [5] "TMSB15B" "MEI1" "ADCK5" "SNHG11"
## [9] "ZNF257" "ZNF525" "LYPLA1P3" "ZNF826P"
## [13] "LINC00493" "SNRPEP4" "FAM200B" "RP11-552M11.4"
## [17] "SRP14-AS1" "TPM3P6" "USP30-AS1" "RP11-1094M14.7"
## [21] "RIMKLB"
## [1] 23
## [1] "OCEL1" "DHDDS" "PGPEP1" "RRNAD1" "RNF25"
## [6] "SLC39A13" "RPSAP47" "ZNF544" "TRAV8-2" "RP4-694B14.5"
## [11] "DNASE1" "PTGES3P2" "CCT8P1" "RP11-350G8.3" "SEPT7P7"
## [16] "RP11-782C8.1" "TSEN15P1" "FAM21FP" "ETF1P2" "SF3A3P2"
## [21] "RP4-739H11.4"
## [1] 24
## [1] "STAU2" "P2RX5" "ERGIC2" "DHPS" "THOC5" "DDX49" "SRM"
## [8] "IMP4" "MAP3K6" "LIMD1" "GLUD1" "CXXC1" "MITD1" "MARS"
## [15] "NT5DC1" "DUSP28" "PIK3R4" "TNRC6C" "LMO7" "PPP1R10"
## [1] 25
## [1] "CRTAM" "HAVCR1" "TNFSF10" "PTGER2" "IFI44L"
## [6] "IFI44" "APH1B" "SPON2" "MT1E" "HOPX"
## [11] "TRBV9" "TRAV13-1" "MIR155HG" "AC092580.4" "RN7SL749P"
## [16] "AC005329.7" "RNU1-125P" "AC006483.1" "MIR142" "AC006129.4"
## [1] 26
## [1] "NOP16" "WDR3" "GTPBP10" "NOP2" "DKC1" "MTO1"
## [7] "BCAS3" "PELP1" "UTP23" "PHAX" "ZFP62" "ZNF431"
## [13] "ZNF770" "C9orf114" "ZBTB48" "RASGRF2" "SGSM2" "FAM102A"
## [19] "DNHD1"
## [1] 27
## [1] "C14orf93" "MS4A6A" "TMEM138" "TRPT1"
## [5] "GSTM4" "TRDV1" "RP13-383K5.4" "CD27-AS1"
## [9] "TSTD1" "RNU2-2P" "AC012318.3" "RP11-773D16.1"
## [13] "RP4-742C19.12" "TRGV2" "MAPKAPK5-AS1" "AC104820.2"
## [17] "LINC00484" "RP11-305L7.3" "RP11-416A17.6" "CTD-2521M24.9"
## [1] 28
## [1] "TNFRSF9" "PON2" "KLRB1" "TEP1" "ACTR10" "KLRC1"
## [7] "BORA" "SPPL2A" "PANK4" "MGAT4B" "FCGR3B" "SLC38A9"
## [13] "WRB" "CCL4L2" "FCGR3A" "MSH5" "CCL4L1" "KLRC2"
## [19] "TCTEX1D2"
## [1] 29
## [1] "PECR" "XCL1" "XCL2" "CX3CR1" "GTSF1"
## [6] "ZNF683" "FCRL6" "C10orf128" "TRBV4-2" "TRAV14DV4"
## [11] "TRAV29DV5" "TRAV35" "KANSL1-AS1" "RP1-182O16.1" "PATL2"
## [16] "AKAP2" "RPL37P23" "RP11-81H14.2" "AC139149.1" "TPMTP1"
## [21] "LUZP6"
## [1] 30
## [1] "ZNRD1" "MICALL1" "CAPN15" "POLR2I" "CD86" "MTFR1L" "IFIT3"
## [8] "IFIT2" "UXT" "HRSP12" "RSAD2" "CLSTN3" "KSR1" "MED11"
## [15] "POLR2H" "IL15" "TMEM107" "IFIT1" "TRGV3"
## [1] 31
## [1] "MDGA1" "CDCA7L" "ZNF816" "RPS12P26"
## [5] "TCF7" "CCR7" "C1orf162" "LDLRAP1"
## [9] "PLXDC1" "FILIP1L" "TMEM150A" "CD248"
## [13] "POU6F1" "TRABD2A" "MIR4461" "CTD-3092A11.1"
## [17] "RP11-632K20.7" "CHRM3-AS2" "RP11-285F7.2" "WHAMMP2"
## [21] "RP1-313I6.12"
## [1] 32
## [1] "PTPN18" "TIPIN" "NSMCE4A" "NCOA7"
## [5] "AHI1" "APTX" "RNF121" "HIRIP3"
## [9] "MFSD6" "ALG8" "PRIMPOL" "CTSB"
## [13] "GNGT2" "KIR3DL1" "TBCA" "MZT2A"
## [17] "DALRD3" "TNFRSF18" "KIR2DS4" "RP11-271C24.3"
## [1] 33
## [1] "PDE6B" "ZNF678" "RP4-728D4.2" "RP11-493L12.6"
## [5] "DGKA" "NEK1" "FBXO32" "EPB41"
## [9] "PLGLB1" "RPS26P6" "RPS3AP26" "RPL9P8"
## [13] "RPL3P4" "AP001468.1" "RPL9P9" "RPL9P7"
## [17] "ATP1B3-AS1" "RP13-488H8.1" "RP11-320M16.1"
## [1] 34
## [1] "PMM1" "TFDP2" "C2orf43" "CCRL2"
## [5] "NARG2" "TMEM241" "SLC2A8" "SLC35A5"
## [9] "FUOM" "DRAM2" "LZTFL1" "TRIM68"
## [13] "TMEM134" "SMIM19" "SLC9A9" "TRDC"
## [17] "RP11-254B13.4" "BHLHE40-AS1" "RP11-844P9.3" "RP11-265N6.3"
## [21] "MIR3615" "RP11-686D22.10"
## [1] 35
## [1] "CHI3L2" "NT5C2" "WARS" "TMPRSS3"
## [5] "DNAJC18" "PSMG1" "FAM49A" "PSMA6P1"
## [9] "AC009403.2" "RNU2-63P" "CDC42P1" "AC002331.1"
## [13] "RP11-32B5.1" "RP3-395M20.8" "RN7SL56P" "GMPSP1"
## [17] "RPPH1" "MIR3661" "AL161784.1" "CTD-3214H19.4"
## [21] "CTD-2545M3.2" "RP11-297D21.4"
## [1] 36
## [1] "C9orf89" "CMAHP" "RASGRP2" "MGAT4A" "HECA"
## [6] "NR4A3" "FAM117B" "PLAC8" "ACRC" "TIPARP"
## [11] "PRMT10" "CHD2" "PLK3" "ZBTB20" "ANKRD37"
## [16] "SELL" "AC021593.1" "LTB" "RP11-747H7.3" "RP11-434H6.7"
## [1] 37
## [1] "NDUFAF7" "SMG6" "IL4R" "USP48" "WDR7"
## [6] "UTP6" "EMG1" "L3HYPDH" "DEF8" "SEC24D"
## [11] "AGPAT6" "BAP1" "GNG4" "CHAMP1" "MBD5"
## [16] "CATSPER2P1" "FOXP1" "DHRS3" "SATB1"
## [1] 38
## [1] "RABGAP1" "NTHL1" "GBA2" "DHRS12" "LIN7B"
## [6] "ILVBL" "LIG1" "SNX8" "PRR4" "RCOR3"
## [11] "NUP85" "CHTF18" "FOPNL" "TTLL4" "COG5"
## [16] "KNTC1" "IFRD2" "RP11-33B1.1" "PMEPA1" "MPP7"
## [1] 39
## [1] "WDR13" "GABPB1" "SLC11A2" "LOXL3" "FANCL"
## [6] "ID3" "IRF8" "PMAIP1" "PLIN2" "CETN3"
## [11] "MTHFD2L" "NFU1" "PCGF5" "CADM1" "BTLA"
## [16] "TDRD7" "HIST1H4J" "FANCG" "AL358333.1"
## [1] 40
## [1] "CAPN12" "TRAV12-2" "PLXND1" "APBA2" "UAP1"
## [6] "TMEM66" "AOAH" "CDC42EP3" "ZNF589" "NPIPB3"
## [11] "NPIPB4" "RPS26" "FAM169A" "KLRK1" "MTND2P28"
## [16] "CTD-2328D6.1" "RPL41" "NPIPB5" "MTATP6P1" "QRSL1P3"
## [21] "RP11-51J9.5"
for(n1 in names(goseq_res)){
k = as.numeric(gsub("set_", "", n1))
print(n1)
print(gene_sets[[k]])
print(goseq_res[[n1]])
}## [1] "set_3"
## [1] "REC8" "DUS4L" "GALK1" "IFNG" "GNPTAB"
## [6] "MCM9" "GORASP1" "STK25" "STYXL1" "TMEM218"
## [11] "LRRC28" "SPATA13" "EDARADD" "PIK3IP1" "ARRDC2"
## [16] "SLC38A2" "AMICA1" "PFKFB3" "RBKS" "RP11-44N11.1"
## $go_bp
## category over_represented_pvalue
## 241 carbohydrate phosphorylation 6.272519e-07
## under_represented_pvalue numDEInCat numInCat FDR
## 241 1 4 5 0.002563578
##
## [1] "set_11"
## [1] "REXO1" "POP4" "C3orf14" "GALE" "TMEM214" "CCDC77" "CKS2"
## [8] "UROD" "CCNB1" "FHOD1" "RPP30" "MPPE1" "GALK2" "CCNB2"
## [15] "POP5" "RUFY2" "MYCBP" "KIR3DL2"
## $go_bp
## category over_represented_pvalue under_represented_pvalue
## 3987 trna 5 leader removal 8.087298e-06 1.0000000
## 3698 rna 5 end processing 2.995770e-05 0.9999999
## 3699 rna capping 2.995770e-05 0.9999999
## 3986 trna 5 end processing 2.995770e-05 0.9999999
## numDEInCat numInCat FDR
## 3987 3 3 0.03060928
## 3698 3 4 0.03060928
## 3699 3 4 0.03060928
## 3986 3 4 0.03060928
##
## $immune
## category over_represented_pvalue
## 3317 gse36826 wt vs il1r ko skin staph aureus inf up 8.40456e-06
## under_represented_pvalue numDEInCat numInCat FDR
## 3317 0.9999998 5 17 0.04285485
##
## [1] "set_20"
## [1] "PRSS23" "AMFR" "MRPL55" "RPS17" "TNF" "PPP2R5C" "RPL34"
## [8] "RPS12" "RPL21" "RPL5" "BTG1" "PDCD4" "ZFP36L2" "RPL9"
## [15] "RPS18"
## $reactome
## category
## 890 srp dependent cotranslational protein targeting to membrane
## 256 eukaryotic translation initiation
## 734 response of eif2ak4 gcn2 to amino acid deficiency
## 255 eukaryotic translation elongation
## 691 regulation of expression of slits and robos
## 547 nonsense mediated decay nmd
## 808 selenoamino acid metabolism
## 986 translation
## 117 cellular response to starvation
## 869 signaling by robo receptors
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 890 1.418360e-05 0.9999992 7 29
## 256 1.575643e-05 0.9999991 7 30
## 734 1.751556e-05 0.9999990 7 30
## 255 1.766013e-05 0.9999990 7 31
## 691 1.991804e-05 0.9999988 7 31
## 547 2.074074e-05 0.9999988 7 31
## 808 2.347818e-05 0.9999986 7 32
## 986 2.350223e-05 0.9999983 8 46
## 117 2.692313e-05 0.9999983 7 33
## 869 2.696435e-05 0.9999983 7 33
## FDR
## 890 0.002755757
## 256 0.002755757
## 734 0.002755757
## 255 0.002755757
## 691 0.002755757
## 547 0.002755757
## 808 0.002755757
## 986 0.002755757
## 117 0.002755757
## 869 0.002755757
##
## $go_bp
## category over_represented_pvalue under_represented_pvalue
## 515 cytoplasmic translation 1.573413e-06 0.9999999
## numDEInCat numInCat FDR
## 515 7 33 0.006430541
##
## $immune
## category over_represented_pvalue
## 1974 gse22886 naive tcell vs dc up 4.358409e-07
## under_represented_pvalue numDEInCat numInCat FDR
## 1974 1 7 33 0.002222353
##
## [1] "set_26"
## [1] "NOP16" "WDR3" "GTPBP10" "NOP2" "DKC1" "MTO1"
## [7] "BCAS3" "PELP1" "UTP23" "PHAX" "ZFP62" "ZNF431"
## [13] "ZNF770" "C9orf114" "ZBTB48" "RASGRF2" "SGSM2" "FAM102A"
## [19] "DNHD1"
## $go_bp
## category over_represented_pvalue under_represented_pvalue
## 3693 ribosome biogenesis 8.642201e-07 1.0000000
## 1442 ncrna processing 5.031860e-06 0.9999997
## numDEInCat numInCat FDR
## 3693 7 35 0.003532067
## 1442 7 42 0.010282606
##
## [1] "set_30"
## [1] "ZNRD1" "MICALL1" "CAPN15" "POLR2I" "CD86" "MTFR1L" "IFIT3"
## [8] "IFIT2" "UXT" "HRSP12" "RSAD2" "CLSTN3" "KSR1" "MED11"
## [15] "POLR2H" "IL15" "TMEM107" "IFIT1" "TRGV3"
## $immune
## category
## 5059 scherer pbmc apsv wetvax age 18 32yo 2 to 4dy up
## 2344 gse26030 th1 vs th17 day5 post polarization up
## 3498 gse37533 pparg1 foxp3 vs pparg2 foxp3 transduced cd4 tcell pioglitazone treated dn
## 5061 scherer pbmc apsv wetvax age 18 32yo 5 to 7dy up
## 1648 gse21360 primary vs quaternary memory cd8 tcell up
## 4652 gse7509 dc vs monocyte with fcgriib stim dn
## 4763 gse8835 cd4 vs cd8 tcell cll patient up
## 466 gse14000 unstim vs 16h lps dc translated rna dn
## 1452 gse19888 adenosine a3r inh vs act with inhibitor pretreatment in mast cell up
## 1317 gse18281 subcapsular vs central cortical region of thymus dn
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 5059 1.576970e-06 1.0000000 5 12
## 2344 3.135367e-06 0.9999999 6 24
## 3498 4.853328e-06 0.9999998 6 25
## 5061 2.606480e-05 0.9999985 6 34
## 1648 4.127326e-05 0.9999982 5 22
## 4652 5.214184e-05 0.9999976 5 23
## 4763 5.405585e-05 0.9999975 5 23
## 466 6.332070e-05 0.9999970 5 23
## 1452 7.653433e-05 0.9999961 5 25
## 1317 9.103684e-05 0.9999952 5 26
## FDR
## 5059 0.007993618
## 2344 0.007993618
## 3498 0.008249039
## 5061 0.033226104
## 1648 0.039375824
## 4652 0.039375824
## 4763 0.039375824
## 466 0.040359029
## 1452 0.042095927
## 1317 0.042095927
##
## [1] "set_34"
## [1] "PMM1" "TFDP2" "C2orf43" "CCRL2"
## [5] "NARG2" "TMEM241" "SLC2A8" "SLC35A5"
## [9] "FUOM" "DRAM2" "LZTFL1" "TRIM68"
## [13] "TMEM134" "SMIM19" "SLC9A9" "TRDC"
## [17] "RP11-254B13.4" "BHLHE40-AS1" "RP11-844P9.3" "RP11-265N6.3"
## [21] "MIR3615" "RP11-686D22.10"
## $immune
## category
## 4783 gse8921 unstim vs tlr1 2 stim monocyte 12h up
## 2532 gse2770 tgfb and il4 vs il4 treated act cd4 tcell 6h up
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4783 1.594931e-06 1.0000000 5 16
## 2532 4.743483e-06 0.9999999 5 19
## FDR
## 4783 0.008132555
## 2532 0.012093510
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 8958397 478.5 17067254 911.5 NA 17067254 911.5
## Vcells 19166084 146.3 58167881 443.8 65536 78485130 598.8
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] TxDb.Hsapiens.UCSC.hg38.knownGene_3.16.0
## [2] GenomicFeatures_1.50.4
## [3] GenomicRanges_1.50.2
## [4] GenomeInfoDb_1.34.9
## [5] org.Hs.eg.db_3.16.0
## [6] AnnotationDbi_1.60.2
## [7] IRanges_2.32.0
## [8] S4Vectors_0.36.2
## [9] Biobase_2.58.0
## [10] BiocGenerics_0.44.0
## [11] goseq_1.50.0
## [12] geneLenDataBase_1.34.0
## [13] BiasedUrn_2.0.10
## [14] fgsea_1.24.0
## [15] biomaRt_2.54.1
## [16] limma_3.54.2
## [17] tidyr_1.3.0
## [18] ggpubr_0.6.0
## [19] ggplot2_3.4.2
## [20] data.table_1.14.8
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-162 matrixStats_1.0.0
## [3] bitops_1.0-7 bit64_4.0.5
## [5] filelock_1.0.2 progress_1.2.2
## [7] httr_1.4.6 tools_4.2.3
## [9] backports_1.4.1 bslib_0.4.2
## [11] utf8_1.2.3 R6_2.5.1
## [13] mgcv_1.8-42 DBI_1.1.3
## [15] colorspace_2.1-0 withr_2.5.0
## [17] tidyselect_1.2.0 prettyunits_1.1.1
## [19] bit_4.0.5 curl_5.0.1
## [21] compiler_4.2.3 cli_3.6.1
## [23] xml2_1.3.4 DelayedArray_0.24.0
## [25] rtracklayer_1.58.0 sass_0.4.5
## [27] scales_1.2.1 rappdirs_0.3.3
## [29] Rsamtools_2.14.0 stringr_1.5.0
## [31] digest_0.6.31 rmarkdown_2.21
## [33] XVector_0.38.0 pkgconfig_2.0.3
## [35] htmltools_0.5.5 MatrixGenerics_1.10.0
## [37] dbplyr_2.3.2 fastmap_1.1.1
## [39] rlang_1.1.0 rstudioapi_0.14
## [41] RSQLite_2.3.1 BiocIO_1.8.0
## [43] jquerylib_0.1.4 generics_0.1.3
## [45] jsonlite_1.8.4 BiocParallel_1.32.6
## [47] dplyr_1.1.2 car_3.1-2
## [49] RCurl_1.98-1.12 magrittr_2.0.3
## [51] GO.db_3.16.0 GenomeInfoDbData_1.2.9
## [53] Matrix_1.6-4 Rcpp_1.0.10
## [55] munsell_0.5.0 fansi_1.0.4
## [57] abind_1.4-5 lifecycle_1.0.3
## [59] stringi_1.7.12 yaml_2.3.7
## [61] carData_3.0-5 SummarizedExperiment_1.28.0
## [63] zlibbioc_1.44.0 BiocFileCache_2.6.1
## [65] grid_4.2.3 blob_1.2.4
## [67] parallel_4.2.3 crayon_1.5.2
## [69] lattice_0.20-45 splines_4.2.3
## [71] Biostrings_2.66.0 cowplot_1.1.1
## [73] hms_1.1.3 KEGGREST_1.38.0
## [75] knitr_1.44 pillar_1.9.0
## [77] rjson_0.2.21 ggsignif_0.6.4
## [79] codetools_0.2-19 fastmatch_1.1-3
## [81] XML_3.99-0.14 glue_1.6.2
## [83] evaluate_0.20 png_0.1-8
## [85] vctrs_0.6.2 gtable_0.3.3
## [87] purrr_1.0.1 cachem_1.0.7
## [89] xfun_0.39 broom_1.0.4
## [91] restfulr_0.0.15 rstatix_0.7.2
## [93] tibble_3.2.1 GenomicAlignments_1.34.1
## [95] memoise_2.0.1